Introduction

Load libraries

Define colors

Load single cell RNA-Seq data

Extended Data Figure 7a

Extended Data Figure 7d

Redefine scRepertoire function

To be able to change few ploting parameters of the clonalHomeostasis function, we copy/paste and modified some function from scRepertoire

Open clonotype data

n= 29690 TCR with EXACTLY one alpha chain CDR3 and one beta chain CDR3

Add transcriptomic data

if(params$accessibility == "unlock"){

  srat <- RenameCells(srat,new.names = paste0(srat$patient, "_",gsub("(_1|_2|_3|_4|_5|_6|_7|_8|_9)*", "",colnames(srat))))
  srat <- RenameCells(srat,new.names = gsub("_post", "",colnames(srat)))

  srat$sample <- gsub("_post", "",srat$patient)

  seurat <- combineExpression(combined, srat, 
                  cloneTypes = c(Rare = .00005, Small = .0005, 
                        Medium = .005, Large = .05, Hyperexpanded = 1),
                        cloneCall = "strict", 
                  group.by = "sample", chain = "both",
                  proportion = TRUE,
                  filterNA = TRUE)



  s    <- subset(seurat, subset = anno_l1 %in% c("CD8+ T cells","CD4+ T cells", "Proliferating cells", "Tregs", "Natural killer cells"))

  s$cloneType <- droplevels(s$cloneType)

  s@meta.data$Pathological.Response <- factor(s@meta.data$Pathological.Response, levels = c("pCR", "non-pCR"))

  table(s$anno_l1)
  table(s$sample, s$cloneType)

} else{
  print("Please request access to the BCR-Seq data.")
}
##            
##             Hyperexpanded (0.05 < X <= 1) Large (0.005 < X <= 0.05) Medium (5e-04 < X <= 0.005)
##   NeoBCC004                             0                       276                         887
##   NeoBCC005                             0                       572                        1782
##   NeoBCC006                           109                       588                        1052
##   NeoBCC007                           175                       322                         551
##   NeoBCC008                            50                       109                         213
##   NeoBCC010                             7                        16                           0
##   NeoBCC011                           382                       326                         945
##   NeoBCC012                            29                        62                         112
##   NeoBCC014                           181                       133                        1280
##   NeoBCC015                           193                       827                        1318
##   NeoBCC017                             0                       600                         674
##   NeoBCC018                           247                       463                         892
##            
##             Small (5e-05 < X <= 5e-04)
##   NeoBCC004                       1034
##   NeoBCC005                       2920
##   NeoBCC006                          0
##   NeoBCC007                        624
##   NeoBCC008                          0
##   NeoBCC010                          0
##   NeoBCC011                        765
##   NeoBCC012                          0
##   NeoBCC014                          0
##   NeoBCC015                       1020
##   NeoBCC017                        610
##   NeoBCC018                        751

We mapped 26528 TCR to the scRNA-Seq data, including 23099 to annotated T cells (10699 CD4 T cells, 1951 Tregs and 9517 CD8 T cells). Only 3 patients do not have hyperexpanded clones (2 non-pCR and 1 pCR).

if(params$accessibility == "unlock"){


  p <- SCpubr::do_DimPlot(sample = s, reduction = "umap", label = FALSE, repel = TRUE, label.color = "white", 
                         legend.position = "none", split.by = "cloneType", font.size = 12,pt.size = 1, ncol = 5,colors.use = colors_clonotype)
  print(p)
  
  p <- SCpubr::do_DimPlot(sample = s, reduction = "umap", label = FALSE, repel = TRUE, label.color = "white", 
                         legend.position = "none", group.by = "cloneType", font.size = 12,pt.size = 1,  colors.use = colors_clonotype)


  DT::datatable(p$data, 
              caption = ("EDF7d1"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
} else{
  print("Please request access to the BCR-Seq data.")
}

if(params$accessibility == "unlock"){

  combined_T <- lapply(combined, function(x) {x[x$barcode %in% colnames(s),] })
  merge_combined_T <- bind_rows(combined_T, .id = "sample")



  c_pCR <- combined_T[c("NeoBCC008",
           "NeoBCC007", 
           "NeoBCC012",
           "NeoBCC017" )]
  p3 <- clonalHomeostasis_m(c_pCR, chain = "both") +
    theme(axis.text.x=element_blank())
  p3$labels$x <- "pCR"


  c_nonpCR <- combined_T[c( 
                        "NeoBCC010",
                        "NeoBCC014",
                        "NeoBCC011",
                        "NeoBCC004",
                        "NeoBCC005",
                        "NeoBCC018",
                        "NeoBCC006",
                        "NeoBCC015"
 )]
  p4 <- clonalHomeostasis_m(c_nonpCR, chain = "both") +
    theme(axis.text.x=element_blank())
  p4$labels$y <- " "
  p4$labels$x <- "non-pCR"

  p <- p3 + p4 + plot_layout(ncol = 2,  widths  = c(1,2),  guides = "collect")

  print(p)
  DT::datatable(rbind(p3$data, p4$data), 
              caption = ("EDF7d1"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
} else{
  print("Please request access to the BCR-Seq data.")
}

Extended Data Figure 7b

if(params$accessibility == "unlock"){

  Idents(s) <- s$anno_l2

  tmp <- data.frame(grabMeta(s), identity = s$anno_l2, get.coord(s, "umap"))
 
    
  tmp$include <- ifelse(tmp$Frequency >= 0.05, "Yes", NA)
  tmp2 <- subset(tmp, include == "Yes")
  p1 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
    geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
    geom_density_2d(color = "black", lwd=0.5, bins = 8) + 
    theme_minimal() +
    labs(color = "T cell subsets") +
     theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
    scale_colour_manual(values = colors_anno_l2, breaks = levels(s$anno_l2)) +
    xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n hyperexpanded")
  print(p1)
  
  DT::datatable(p1$data, 
              caption = ("Extended Data Figure 7b"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

   
  tmp <- data.frame(grabMeta(s), identity = s$anno_l2, get.coord(s, "umap"))
  tmp$include <- ifelse(tmp$Frequency >= 0.005 & tmp$Frequency <= 0.05 , "Yes", NA)
  tmp2 <- subset(tmp, include == "Yes")
  p2 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
    geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
    geom_density_2d(color = "black", lwd=0.5, bins = 8) + 
    theme_minimal() +
    labs(color = "T cell subsets") +
     theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
    scale_colour_manual(values = colors_anno_l2, breaks = levels(s$anno_l2)) +
    xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n large clones") 
  print(p2)
  
   DT::datatable(p2$data, 
              caption = ("Extended Data Figure 7b"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
  
  tmp <- data.frame(grabMeta(s), identity = s$anno_l2, get.coord(s, "umap"))
  tmp$include <- ifelse(tmp$Frequency >= 0.0005 & tmp$Frequency <= 0.005 , "Yes", NA)
  tmp2 <- subset(tmp, include == "Yes")
  p3 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
    geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
    geom_density_2d(color = "black", lwd=0.5, bins = 8) + 
    theme_minimal() +
    labs(color = "T cell subsets") +
     theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
    scale_colour_manual(values = colors_anno_l2, breaks = levels(s$anno_l2)) +
    xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n medium clones") 
  print(p3)
  
   DT::datatable(p3$data, 
              caption = ("Extended Data Figure 7b"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
  
  tmp <- data.frame(grabMeta(s), identity = s$anno_l2, get.coord(s, "umap"))
  tmp$include <- ifelse(tmp$Frequency >= 0.00005 & tmp$Frequency <= 0.0005 , "Yes", NA)
  tmp2 <- subset(tmp, include == "Yes")
  p4 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
    geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
    geom_density_2d(color = "black", lwd=0.5, bins = 8) + 
    theme_minimal() +
    labs(color = "T cell subsets") +
     theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
    scale_colour_manual(values = colors_anno_l2, breaks = levels(s$anno_l2)) +
    xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n small clones")
  
  print(p4)

 DT::datatable(p4$data, 
              caption = ("EDF7b"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
} else{
  print("Please request access to the BCR-Seq data.")
}

Extended Data Figure 7c

if(params$accessibility == "unlock"){

q1 <- quantContig(combined_T, cloneCall="gene+nt", exportTable = TRUE)
head(q1)
 

q1$Response <- "non-pCR"
q1$patient <- q1$values
q1$Response[grepl("NeoBCC007|NeoBCC008|NeoBCC012|NeoBCC017", q1$values)] <- "pCR"
q1$Response <- factor(q1$Response, levels = c("pCR", "non-pCR"))
q1$percent <- q1$contigs / q1$total * 100


g1 <- ggpaired(q1, x = "Response", y = "total", id="patient", color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Number \n of TCR") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 4800, hide.ns = TRUE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("pCR", "non-pCR")), size = 8)  +theme(text = element_text(size = 30)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0, 6000))


g2 <- ggpaired(q1, x = "Response", y = "percent", id="patient", color = "black", fill = "Response" ,palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Relative % of \n unique clonotypes") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 75, hide.ns = TRUE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("pCR", "non-pCR")), size = 8)  +theme(text = element_text(size = 30)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))



g3 <- ggpaired(q1, x = "Response", y = "contigs", id="patient",  color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Number of unique \n clonotypes") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 2500, hide.ns = TRUE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("pCR", "non-pCR")), size = 8)  +theme(text = element_text(size = 30)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0,3100))

} else{
  print("Please request access to the BCR-Seq data.")
}
if(params$accessibility == "unlock"){

norm_entropy <- function(observations) {
  observations <- observations[!is.na(observations)]
  p <- as.numeric(table(observations)) / length(observations)
  n <- length(p)
  if (n == 1) {
    return(0)
  }
  h <- -sum(p*log(p))
  h_max <- log(length(observations))
  return(h / h_max)
}
clonality <- rep(NA, length(names(combined_T)))
names(clonality) <- names(combined_T)
for (p in names(combined_T)){
clonality[p] <- norm_entropy(combined_T[[p]]$CTaa)
}

clonality <- as.data.frame(clonality)

clonality$Response <- "non-pCR"
clonality$Response[grepl("NeoBCC007|NeoBCC008|NeoBCC012|NeoBCC017", rownames(clonality))] <- "pCR"
clonality$patient <- rownames(clonality)
clonality$patient[clonality$patient == "BCC020"] <- "NeoBCC020"
clonality$Response <- factor(clonality$Response, levels = c("pCR", "non-pCR"))

g6 <- ggpaired(clonality, x = "Response", y = "clonality", id="patient",  color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Repertoire \n clonality") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 0.88, hide.ns = TRUE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("pCR", "non-pCR")), size = 8)  +theme(text = element_text(size = 30)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))



p <- g1 + g3 +g2 + g6 + plot_layout(ncol=4, guides="collect")  &NoLegend()
print(p)
DT::datatable(g1$data, 
              caption = ("Extended Data Figure 7c"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

DT::datatable(g6$data, 
              caption = ("EDF7c.2"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

} else{
  print("Please request access to the BCR-Seq data.")
}

Extended Data Figure 7e

if(params$accessibility == "unlock"){

genes <- list( 
               "Hyperexpanded" = c("GNLY","KLRC2", "ZNF683", "RBFOX2","ITGA1", "ITM2C", "SYNGR1"),
               "Large"         = c("GEM", "VCAM", "TNFRSF9",  "PDCD1", "TIGIT", "CD74"),
               "Medium"        = c("CXCL13", "DUSP4", "PLEK"),
               "Small"         = c("LTB", "CCR7", "NEAT1", "IL7R", "LEF1")
               )

p <- SCpubr::do_DotPlot(sample = s, 
                   features = genes,
                   group.by = "cloneType",
                   font.size = 30, 
                   legend.length = 20,  
                   legend.type = "colorbar", 
                   dot.scale = 10,  
                   sequential.palette ="PRGn",
                        scale = TRUE,
                        sequential.direction = -1)

print(p)

DT::datatable(p$data, 
              caption = ("EDF7e"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
} else{
  print("Please request access to the BCR-Seq data.")
}

Session Info

## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Vienna
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] stringr_1.5.1         msigdbr_7.5.1         DOSE_3.26.2           org.Hs.eg.db_3.17.0  
##  [5] AnnotationDbi_1.62.2  IRanges_2.34.1        S4Vectors_0.38.2      Biobase_2.60.0       
##  [9] BiocGenerics_0.46.0   clusterProfiler_4.8.3 enrichplot_1.20.3     scales_1.3.0         
## [13] RColorBrewer_1.1-3    ggnewscale_0.4.10     tidyr_1.3.1           scRepertoire_1.10.1  
## [17] dittoSeq_1.12.2       canceRbits_0.1.6      ggpubr_0.6.0.999      ggplot2_3.5.1        
## [21] viridis_0.6.5         viridisLite_0.4.2     reshape2_1.4.4        tibble_3.2.1         
## [25] SCpubr_2.0.2          DT_0.32               patchwork_1.2.0       dplyr_1.1.4          
## [29] Seurat_5.0.3          SeuratObject_5.0.1    sp_2.1-3             
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.4                    matrixStats_1.2.0           spatstat.sparse_3.0-3      
##   [4] bitops_1.0-7                HDO.db_0.99.1               httr_1.4.7                 
##   [7] doParallel_1.0.17           tools_4.3.0                 sctransform_0.4.1          
##  [10] backports_1.4.1             utf8_1.2.4                  R6_2.5.1                   
##  [13] vegan_2.6-4                 lazyeval_0.2.2              uwot_0.1.16                
##  [16] mgcv_1.9-1                  permute_0.9-7               withr_3.0.0                
##  [19] gridExtra_2.3               progressr_0.14.0            cli_3.6.2                  
##  [22] spatstat.explore_3.2-7      fastDummies_1.7.3           scatterpie_0.2.1           
##  [25] isoband_0.2.7               labeling_0.4.3              sass_0.4.9                 
##  [28] spatstat.data_3.0-4         ggridges_0.5.6              pbapply_1.7-2              
##  [31] yulab.utils_0.1.4           gson_0.1.0                  stringdist_0.9.12          
##  [34] parallelly_1.37.1           limma_3.56.2                RSQLite_2.3.5              
##  [37] VGAM_1.1-10                 rstudioapi_0.16.0           generics_0.1.3             
##  [40] gridGraphics_0.5-1          ica_1.0-3                   spatstat.random_3.2-3      
##  [43] crosstalk_1.2.1             car_3.1-2                   GO.db_3.17.0               
##  [46] Matrix_1.6-5                ggbeeswarm_0.7.2            fansi_1.0.6                
##  [49] abind_1.4-5                 lifecycle_1.0.4             edgeR_3.42.4               
##  [52] yaml_2.3.8                  carData_3.0-5               SummarizedExperiment_1.30.2
##  [55] qvalue_2.32.0               Rtsne_0.17                  blob_1.2.4                 
##  [58] grid_4.3.0                  promises_1.2.1              crayon_1.5.2               
##  [61] miniUI_0.1.1.1              lattice_0.22-6              cowplot_1.1.3              
##  [64] KEGGREST_1.40.1             pillar_1.9.0                knitr_1.45                 
##  [67] fgsea_1.26.0                GenomicRanges_1.52.1        future.apply_1.11.1        
##  [70] codetools_0.2-19            fastmatch_1.1-4             leiden_0.4.3.1             
##  [73] glue_1.7.0                  downloader_0.4              ggfun_0.1.5                
##  [76] data.table_1.15.2           treeio_1.24.3               vctrs_0.6.5                
##  [79] png_0.1-8                   spam_2.10-0                 gtable_0.3.5               
##  [82] assertthat_0.2.1            cachem_1.1.0                xfun_0.43                  
##  [85] S4Arrays_1.0.6              mime_0.12                   tidygraph_1.3.1            
##  [88] survival_3.5-8              DElegate_1.2.1              SingleCellExperiment_1.22.0
##  [91] pheatmap_1.0.12             iterators_1.0.14            fitdistrplus_1.1-11        
##  [94] ROCR_1.0-11                 nlme_3.1-164                ggtree_3.13.0.001          
##  [97] bit64_4.0.5                 RcppAnnoy_0.0.22            evd_2.3-6.1                
## [100] GenomeInfoDb_1.36.4         bslib_0.6.2                 irlba_2.3.5.1              
## [103] vipor_0.4.7                 KernSmooth_2.23-22          DBI_1.2.2                  
## [106] colorspace_2.1-0            ggrastr_1.0.2               tidyselect_1.2.1           
## [109] bit_4.0.5                   compiler_4.3.0              SparseM_1.81               
## [112] DelayedArray_0.26.7         plotly_4.10.4               shadowtext_0.1.3           
## [115] lmtest_0.9-40               digest_0.6.35               goftest_1.2-3              
## [118] spatstat.utils_3.0-4        rmarkdown_2.26              XVector_0.40.0             
## [121] htmltools_0.5.8             pkgconfig_2.0.3             sparseMatrixStats_1.12.2   
## [124] MatrixGenerics_1.12.3       highr_0.10                  fastmap_1.2.0              
## [127] rlang_1.1.4                 htmlwidgets_1.6.4           shiny_1.8.1                
## [130] farver_2.1.2                jquerylib_0.1.4             zoo_1.8-12                 
## [133] jsonlite_1.8.8              BiocParallel_1.34.2         GOSemSim_2.26.1            
## [136] RCurl_1.98-1.14             magrittr_2.0.3              GenomeInfoDbData_1.2.10    
## [139] ggplotify_0.1.2             dotCall64_1.1-1             munsell_0.5.1              
## [142] Rcpp_1.0.12                 evmix_2.12                  babelgene_22.9             
## [145] ape_5.8                     reticulate_1.35.0           truncdist_1.0-2            
## [148] stringi_1.8.4               ggalluvial_0.12.5           ggraph_2.2.1               
## [151] zlibbioc_1.46.0             MASS_7.3-60.0.1             plyr_1.8.9                 
## [154] parallel_4.3.0              listenv_0.9.1               ggrepel_0.9.5              
## [157] forcats_1.0.0               deldir_2.0-4                Biostrings_2.68.1          
## [160] graphlayouts_1.1.1          splines_4.3.0               tensor_1.5                 
## [163] locfit_1.5-9.9              igraph_2.0.3                spatstat.geom_3.2-9        
## [166] cubature_2.1.0              ggsignif_0.6.4              RcppHNSW_0.6.0             
## [169] evaluate_0.23               foreach_1.5.2               tweenr_2.0.3               
## [172] httpuv_1.6.15               RANN_2.6.1                  purrr_1.0.2                
## [175] polyclip_1.10-6             future_1.33.2               scattermore_1.2            
## [178] ggforce_0.4.2               broom_1.0.5                 xtable_1.8-4               
## [181] tidytree_0.4.6              RSpectra_0.16-1             rstatix_0.7.2              
## [184] later_1.3.2                 gsl_2.1-8                   aplot_0.2.3                
## [187] beeswarm_0.4.0              memoise_2.0.1               cluster_2.1.6              
## [190] powerTCR_1.20.0             globals_0.16.3